In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import brewer2mpl
import colorsys
import math
import dendropy as dp

from datetime import datetime
from Bio import AlignIO, SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Levenshtein import distance
from itertools import combinations, product
from time import time
from __future__ import division
from collections import Counter, defaultdict
from copy import deepcopy
from random import shuffle, choice, sample
from scipy.stats.mstats import mquantiles
from scipy.stats import norm, expon, poisson
from scipy.misc import comb
from IPython.display import Math
%matplotlib inline

In [14]:
tree = dp.Tree.get_from_path('H3N8_Seg1_Skyride_run2_3_4_5_2014-0915.nexus', 'nexus')
tree


Out[14]:
<Tree object at 0x11196ad10>

Amongst the H3N8 isolates, here is the full range of patristic distances present in the PB2 gene tree.


In [15]:
patristic_distances = dp.treecalc.PatristicDistanceMatrix(tree=tree).distances()
plt.hist(patristic_distances)
plt.xlabel('Patristic Distance')
plt.ylabel('Counts')
plt.title('Histogram of Patristic \n Distances in the PB2 Tree')


Out[15]:
<matplotlib.text.Text at 0x114b2a790>

In [16]:
transmission_graph = nx.read_gpickle('Minto Flats.pkl')
transmission_graph.nodes(data=True)
for node in transmission_graph.nodes(data=True):
    if node[1]['subtype'] != 'H3N8':
        transmission_graph.remove_node(node[0])
        # transmission_graph.nodes(data=True)

In [17]:
len(transmission_graph.nodes())


Out[17]:
319

Because I don't have the taxa names identical between the tree and the transmission graph, therefore, I will use a dictionary mapping to map the taxa name in the graph to the taxa name in the tree.


In [18]:
taxadict = {str(t).split("'")[0].split("|")[0].replace("_", " "):t for t in tree.taxon_set}
# taxadict

In [19]:
graph_pds = []
for (taxa1, taxa2) in transmission_graph.edges():
    # Note here that some of the taxa names present in the network are absent from the trees.
    # As such, I will have to ask Kyle to re-run the H3N8 trees. Might take the chance to show
    # him some Python.
    if taxa1 in taxadict.keys() and taxa2 in taxadict.keys():
        t1 = taxadict[taxa1]
        t2 = taxadict[taxa2]
        pd = dp.treecalc.patristic_distance(tree, t1, t2)
        graph_pds.append(pd)

Aaaaand here, let's show what patristic distances we're capturing!


In [20]:
plt.hist(graph_pds)
plt.xlabel('Patristic Distance')
plt.ylabel('Counts')
plt.title('Histogram of Patristic \n Distances Captured by the Network')
plt.show()



In [20]:


In [2]:


In [ ]: